{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1.读取train.csv的数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>date</th>\n",
       "      <th>store_nbr</th>\n",
       "      <th>family</th>\n",
       "      <th>sales</th>\n",
       "      <th>onpromotion</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>2013-01-01</td>\n",
       "      <td>1</td>\n",
       "      <td>AUTOMOTIVE</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2013-01-01</td>\n",
       "      <td>1</td>\n",
       "      <td>BABY CARE</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>2013-01-01</td>\n",
       "      <td>1</td>\n",
       "      <td>BEAUTY</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>2013-01-01</td>\n",
       "      <td>1</td>\n",
       "      <td>BEVERAGES</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>2013-01-01</td>\n",
       "      <td>1</td>\n",
       "      <td>BOOKS</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id        date  store_nbr      family  sales  onpromotion\n",
       "0   0  2013-01-01          1  AUTOMOTIVE    0.0            0\n",
       "1   1  2013-01-01          1   BABY CARE    0.0            0\n",
       "2   2  2013-01-01          1      BEAUTY    0.0            0\n",
       "3   3  2013-01-01          1   BEVERAGES    0.0            0\n",
       "4   4  2013-01-01          1       BOOKS    0.0            0"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_csv(\"train.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['AUTOMOTIVE', 'BABY CARE', 'BEAUTY', 'BEVERAGES', 'BOOKS',\n",
       "       'BREAD/BAKERY', 'CELEBRATION', 'CLEANING', 'DAIRY', 'DELI', 'EGGS',\n",
       "       'FROZEN FOODS', 'GROCERY I', 'GROCERY II', 'HARDWARE',\n",
       "       'HOME AND KITCHEN I', 'HOME AND KITCHEN II', 'HOME APPLIANCES',\n",
       "       'HOME CARE', 'LADIESWEAR', 'LAWN AND GARDEN', 'LINGERIE',\n",
       "       'LIQUOR,WINE,BEER', 'MAGAZINES', 'MEATS', 'PERSONAL CARE',\n",
       "       'PET SUPPLIES', 'PLAYERS AND ELECTRONICS', 'POULTRY',\n",
       "       'PREPARED FOODS', 'PRODUCE', 'SCHOOL AND OFFICE SUPPLIES',\n",
       "       'SEAFOOD'], dtype=object)"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['family'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>date</th>\n",
       "      <th>store_nbr</th>\n",
       "      <th>family</th>\n",
       "      <th>sales</th>\n",
       "      <th>onpromotion</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>2013-01-01</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2013-01-01</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>2013-01-01</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>2013-01-01</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>2013-01-01</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id        date  store_nbr  family  sales  onpromotion\n",
       "0   0  2013-01-01          1       1    0.0            0\n",
       "1   1  2013-01-01          1       2    0.0            0\n",
       "2   2  2013-01-01          1       3    0.0            0\n",
       "3   3  2013-01-01          1       4    0.0            0\n",
       "4   4  2013-01-01          1       5    0.0            0"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "kind = {'AUTOMOTIVE':1, 'BABY CARE':2, 'BEAUTY':3, 'BEVERAGES':4, 'BOOKS':5,\n",
    "       'BREAD/BAKERY':6, 'CELEBRATION':7, 'CLEANING':8, 'DAIRY':9, 'DELI':10, 'EGGS':11,\n",
    "       'FROZEN FOODS':12, 'GROCERY I':13, 'GROCERY II':14, 'HARDWARE':15,\n",
    "       'HOME AND KITCHEN I':16, 'HOME AND KITCHEN II':17, 'HOME APPLIANCES':18,\n",
    "       'HOME CARE':19, 'LADIESWEAR':20, 'LAWN AND GARDEN':21, 'LINGERIE':22,\n",
    "       'LIQUOR,WINE,BEER':23, 'MAGAZINES':24, 'MEATS':25, 'PERSONAL CARE':26,\n",
    "       'PET SUPPLIES':27, 'PLAYERS AND ELECTRONICS':28, 'POULTRY':29,\n",
    "       'PREPARED FOODS':30, 'PRODUCE':31, 'SCHOOL AND OFFICE SUPPLIES':32,\n",
    "       'SEAFOOD':33}\n",
    "df['family'] = df['family'].map(kind) #将列值转为数字,才能用逻辑回归\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 10000 entries, 0 to 9999\n",
      "Data columns (total 6 columns):\n",
      "id             10000 non-null int64\n",
      "date           10000 non-null object\n",
      "store_nbr      10000 non-null int64\n",
      "family         10000 non-null int64\n",
      "sales          10000 non-null float64\n",
      "onpromotion    10000 non-null int64\n",
      "dtypes: float64(1), int64(4), object(1)\n",
      "memory usage: 468.8+ KB\n"
     ]
    }
   ],
   "source": [
    "df = df.head(10000)   #因为train.csv文件太大了，jupyter跑不了，只取前10000行构建模型\n",
    "df.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2.使用sklearn构建逻辑回归模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 0.10533516]\n",
      " [ 0.03188459]\n",
      " [ 0.07866581]\n",
      " ...\n",
      " [-0.00108379]\n",
      " [-0.00108379]\n",
      " [-0.00108379]] [ 6.79585323  4.76263395  3.26405862 ... -0.02884241 -0.02884241\n",
      " -0.02884241]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\86178\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:818: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
      "\n",
      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
      "Please also refer to the documentation for alternative solver options:\n",
      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
      "  extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,\n"
     ]
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "model = LogisticRegression() #实例化\n",
    "X = df[ 'family'].values.reshape(-1, 1)# 特征\n",
    "y = df['sales'].values.astype(int)  # 将 y 定义为目标数组。\n",
    "model.fit(X, y) # 训练模型\n",
    "print(model.coef_, model.intercept_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAGh1JREFUeJzt3X+QXeV93/H3RysJe5U4AiF7qH4tqTV2sCeNyR1Mm0yGMSoINxPRGdsjz3asCnm2Y+FWaTpTQ/QHqRPNxG0agieRMpsIIjy3Fgx2iqalIRqZjtNMjFkZggGFaGuzYgNFCwJsuhlYLd/+cZ7bvat798f9fc7dz2tm597zvc89++ho7372Oec55ygiMDMzq7aq1x0wM7P8cTiYmVkNh4OZmdVwOJiZWQ2Hg5mZ1XA4mJlZjSXDQdK9ks5Leqaq9p8k/Y2kpyX9qaT1Va/dKWlc0vOSbq6q70y1cUl3VNWvlvS4pLOSHpC0tp3/QDMza9xyRg5/Auy8pHYS+GhE/Czwt8CdAJKuAXYDH0nvOSxpQNIA8AfALcA1wGdTW4CvAHdHxHbgdWBfS/8iMzNr2ZLhEBHfBi5cUvvziLiYFr8DbE7PdwHHI+LtiPghMA5cl77GI+IHEfEOcBzYJUnAJ4CH0vuPAbe2+G8yM7MWrW7DOm4DHkjPN5GFRcVkqgG8eEn948AG4I2qoKluX0PSCDACsG7dup//8Ic/3HLnzcxWktOnT78aERuXatdSOEg6CFwEypVSnWZB/RFKLNK+rogYBUYBSqVSjI2NNdRfM7OVTtLEcto1HQ6S9gC/DNwYcxdomgS2VDXbDLyUntervwqsl7Q6jR6q25uZWY80NZVV0k7gS8CvRMR01UsngN2SLpN0NbAd+C7wBLA9zUxaS3bQ+kQKlceAT6X37wEebu6fYmZm7bKcqaxfB/4K+JCkSUn7gN8HfhI4KekpSX8IEBHPAg8CzwF/BtweEbNpVPBF4FHgDPBgagtZyPyapHGyYxBH2/ovNDOzhqmol+z2MQczs8ZJOh0RpaXa+QxpMzOr4XCwwimXYWgIVq3KHsvlpd5hZo1qx3kOZl1TLsPICEynaRATE9kywPBw7/pl1m88crBCOXhwLhgqpqezupm1j8PBCuXcucbqZtYch4MVytatjdXNrDkOByuUQ4dgcHB+bXAwq5tZ+zgcrFCGh2F0FLZtAyl7HB31wWizdvNsJSuc4WGHgVmneeRgZmY1HA5WOD4JzqzzvFvJCqVchr17YWYmW56YyJbBu5rM2skjByuUAwfmgqFiZiarm1n7OBysUF57rbG6mTXH4WBmZjUcDmZmVsPhYGZmNRwOZmZWw+FgZmY1HA5mZlbD4WCFMjDQWN3MmuNwsEKp3BJ0uXUza44vn2GFcvhw9jg6CrOz2YhhZGSubmbt4XCwwjl82GFg1mnerWRmZjUcDmZmVmPJcJB0r6Tzkp6pql0h6aSks+nx8lSXpK9KGpf0tKRrq96zJ7U/K2lPVf3nJX0/veerktTuf6SZmTVmOSOHPwF2XlK7AzgVEduBU2kZ4BZge/oaAY5AFibAXcDHgeuAuyqBktqMVL3v0u9lZmZdtmQ4RMS3gQuXlHcBx9LzY8CtVfX7I/MdYL2kq4CbgZMRcSEiXgdOAjvTa++LiL+KiADur1qXmZn1SLPHHD4QES8DpMf3p/om4MWqdpOptlh9sk69LkkjksYkjU1NTTXZdTMzW0q7D0jXO14QTdTriojRiChFRGnjxo1NdtHMzJbSbDi8knYJkR7Pp/oksKWq3WbgpSXqm+vUzcysh5oNhxNAZcbRHuDhqvrn0qyl64E3026nR4GbJF2eDkTfBDyaXvuxpOvTLKXPVa3LzMx6ZMkzpCV9HbgBuFLSJNmso98GHpS0DzgHfDo1fwT4JDAOTAN7ASLigqTfBJ5I7b4cEZWD3F8gmxH1XuB/pC8zM+shZZOEiqdUKsXY2Fivu2E9UC7DwYNw7hxs3QqHDsHwcK97ZVYMkk5HRGmpdr62khVKuZxdaG96OluemJi7IqsDwqx9fPkMK5SDB+eCoWJ6OqubWfs4HKxQzp1rrG5mzXE4WKFs3dpY3cya43CwQjl0CAYH59cGB7O6mbWPw8EKZXg4uwvctm0gZY+joz4YbdZunq1khTM87DAw6zSPHMzMrIbDwczMajgczMyshsPBzMxqOBzMzKyGw8EKp1yGoSFYtSp7LJd73SOz/uOprFYovvCeWXd45GCF4gvvmXWHw8EKxRfeM+sOh4MVii+8Z9YdDgcrlEOHYO3a+bW1a33hPbN2czhY4Vx6Z9uC3unWLNccDlYoBw/CzMz82syMD0ibtZvDwQrFB6TNusPhYIXiA9Jm3eFwsELxneDMusPhYIXiO8GZdYcvn2GF4zvBmXVeSyMHSf9W0rOSnpH0dUnvkXS1pMclnZX0gKS1qe1laXk8vT5UtZ47U/15STe39k+yfucL75l1XtPhIGkT8G+AUkR8FBgAdgNfAe6OiO3A68C+9JZ9wOsR8UHg7tQOSdek930E2AkcljTQbL+sv1UuvDcxkZ3fULnwngPCrL1aPeawGnivpNXAIPAy8AngofT6MeDW9HxXWia9fqMkpfrxiHg7In4IjAPXtdgv61O+8J5ZdzQdDhHxd8DvAOfIQuFN4DTwRkRcTM0mgU3p+SbgxfTei6n9hup6nffMI2lE0piksampqWa7bgU2MdFY3cya08pupcvJ/uq/GvgHwDrgljpNKxc30AKvLVSvLUaMRkQpIkobN25svNNWeAML7HBcqG5mzWllt9IO4IcRMRURM8A3gX8CrE+7mQA2Ay+l55PAFoD0+k8BF6rrdd5jNs/sbGN1M2tOK+FwDrhe0mA6dnAj8BzwGPCp1GYP8HB6fiItk17/VkREqu9Os5muBrYD322hX9bHVG+cuUjdzJrT9HkOEfG4pIeA7wEXgSeBUeC/A8cl/VaqHU1vOQp8TdI42Yhhd1rPs5IeJAuWi8DtEeG/A62uha7A6iuzmrWXoqCfqlKpFGNjY73uhnXZYiOEgv4om3WVpNMRUVqqnS+fYWZmNRwOZmZWw+FgheKprGbd4XCwQvFUVrPucDhYoVx2WWN1M2uOw8EK5e23G6ubWXMcDmZmVsPhYGZmNRwOVig+5mDWHQ4HK5SjR2vPkpayupm1j8PBCmV4GL72Ndi2LQuFbduyZd9T2qy9mr7wnlmvDA87DMw6zSMHK5xyGYaGYNWq7NH3jzZrP48crFDKZdi7F2ZmsuWJiWwZPJowayePHKxQDhyYC4aKmZmsbmbt43CwQnnttcbqZtYch4OZmdVwOJiZWQ2HQ4d5Zk17rVvXWN3MmuPZSh1ULsPICExPZ8sTE9kyeGZNsxa6h/Ri95Y2s8Z55NBBBw/OBUPF9HRWt+a89VZjdTNrjsOhg86da6xuZpYXDocO2rq1sbotbcOGxupm1hyHQwcdOgSDg/Nrg4NZ3Zpzzz3Zwf1qq1ZldTNrH4dDBw0Pw+jo/CuIjo76YHSr6l2y28zaSxHR6z40pVQqxdjYWK+7YV125ZX1z4besAFefbX7/TErGkmnI6K0VLuWRg6S1kt6SNLfSDoj6R9LukLSSUln0+Plqa0kfVXSuKSnJV1btZ49qf1ZSXta6ZP1N18+w6w7Wt2tdA/wZxHxYeAfAWeAO4BTEbEdOJWWAW4BtqevEeAIgKQrgLuAjwPXAXdVAsXMzHqj6XCQ9D7gl4CjABHxTkS8AewCjqVmx4Bb0/NdwP2R+Q6wXtJVwM3AyYi4EBGvAyeBnc32y/qbz5A2645WRg4/DUwB90l6UtIfS1oHfCAiXgZIj+9P7TcBL1a9fzLVFqrXkDQiaUzS2NTUVAtdt6LyGdJm3dFKOKwGrgWORMTHgP/L3C6keup9fGORem0xYjQiShFR2rhxY6P9tT7gM6TNuqOVcJgEJiPi8bT8EFlYvJJ2F5Eez1e131L1/s3AS4vUzcysR5oOh4j4P8CLkj6USjcCzwEngMqMoz3Aw+n5CeBzadbS9cCbabfTo8BNki5PB6JvSjUzM+uRVq/K+q+BsqS1wA+AvWSB86CkfcA54NOp7SPAJ4FxYDq1JSIuSPpN4InU7ssRcaHFfpmZWQt8EpwVymIHngv6o2zWVV05Cc6Wtn8/rF6d/VJbvTpbNjPLO9/sp4P274cjR+aWZ2fnlg8f7k2fim7VKnj33fp1M2sff6Q6aHS0sbotrV4wLFY3s+Y4HDpodraxui1t27bG6mbWHIdDBw0MNFa3pR06BGvWzK+tWeN7ZJi1m8Ohg0ZGGqvb8ly6C8m7lMzaz+HQQYcPwxe+MDdSGBjIln0wunkHDtTulpudzepm1j4+z8EKxec5mLXG5zmYmVnTHA5mZlbD4dBh5TIMDWUnaQ0NZctmZnnnM6Q7qFyGvXthZiZbnpjIlgGGh3vXLzOzpXjk0EEHDswFQ8XMjGfWmFn+ORw66LXXGqvb0nybULPucDhYoSw0XdXTWM3ay+HQQRs2NFY3M8sLh0MH3XMPrF07v7Z2bVY3M8szh0MHDQ/Dvn3zL5+xb59nKplZ/jkcOqhchmPH5q4FNDubLftcBzPLO4dDBx08CNPT82vT01ndzCzPHA4dNDHRWN2W5ntkmHWHw6GD/Ius/W64obG6mTXH4dBBvk1o+z31VGN1M2uOw6GDfL/j9vNZ52bd4XDooEOHYPUllzZcvdr3Ozaz/HM4dNBf/iVcvDi/dvFiVjczy7OWw0HSgKQnJf23tHy1pMclnZX0gKS1qX5ZWh5Prw9VrePOVH9e0s2t9ikvRkcbq5uZ5UU7Rg4HgDNVy18B7o6I7cDrwL5U3we8HhEfBO5O7ZB0DbAb+AiwEzgsqS/m8/iAtJkVVUvhIGkz8M+AP07LAj4BPJSaHANuTc93pWXS6zem9ruA4xHxdkT8EBgHrmulX3mR56msO3Zkl7mufO3Y0esemVmetDpy+D3g3wPvpuUNwBsRUdnTPglsSs83AS8CpNffTO3/f73Oe+aRNCJpTNLY1NRUi13vvLzOyd+xA06dml87dcoBYWZzmg4HSb8MnI+I09XlOk1jidcWe8/8YsRoRJQiorRx48aG+tsL4+ON1bvl0mBYqm5mK08r95D+BeBXJH0SeA/wPrKRxHpJq9PoYDPwUmo/CWwBJiWtBn4KuFBVr6h+T6GdO9dY3cwsL5oeOUTEnRGxOSKGyA4ofysihoHHgE+lZnuAh9PzE2mZ9Pq3IiJSfXeazXQ1sB34brP9ypPBwcbqZmZ50crIYSFfAo5L+i3gSeBoqh8FviZpnGzEsBsgIp6V9CDwHHARuD0i+mI+z9//fWN1M7O8UBT05rulUinGxsZ63Y1FLXbT+15u9rz2azmK3HezPJB0OiJKS7XzGdIdlOeprGZmi3E4dNCHPtRY3cwsLxwOHfT8843VzczywuHQQb58hpkVlcPBzMxqOBxWoFUL/K8vVDezlce/Dlagd99trG5mK4/DwczMajgczMyshsPBzMxqOBzMzKyGw8HMzGo4HMzMrIbDwczMajgcOmjt2sbq3bJtW2N1M1t5HA4ddPFiY/VuWbeusbqZrTwOhw7K65nIzz3XWN3MVh6Hg5mZ1XA4mJlZDYeDmZnVcDiYWU+UyzA0lF0qfmgoW7b8WN3rDpjZylMuw8gITE9nyxMT2TLA8HDv+mVzPHIws647eHAuGCqmp7O65YPDwRbkYb91yrlzjdWt+xwOVle5DLfdlg33I7LH225zQFh7bN3aWN26r+lwkLRF0mOSzkh6VtKBVL9C0klJZ9Pj5akuSV+VNC7paUnXVq1rT2p/VtKe1v9Z1qoDB+Cdd+bX3nknq5u16tAhGBycXxsczOqWD62MHC4C/y4ifga4Hrhd0jXAHcCpiNgOnErLALcA29PXCHAEsjAB7gI+DlwH3FUJFOud115rrG7WiOFh2LMHBgay5YGBbNkHo/Oj6XCIiJcj4nvp+Y+BM8AmYBdwLDU7Btyanu8C7o/Md4D1kq4CbgZORsSFiHgdOAnsbLZfZpZ/5TIcOwazs9ny7Gy27N2W+dGWYw6ShoCPAY8DH4iIlyELEOD9qdkm4MWqt02m2kL1et9nRNKYpLGpqal2dN0WsGFDY3WzRni2Uv61HA6SfgL4BvCrEfGjxZrWqcUi9dpixGhElCKitHHjxsY7a8t2zz2wZs382po1Wd2sVZ6tlH8thYOkNWTBUI6Ib6byK2l3EenxfKpPAluq3r4ZeGmRuvXQ8DB8/vPz9wl//vPeJ2zt4dlK+dfKbCUBR4EzEfG7VS+dACozjvYAD1fVP5dmLV0PvJl2Oz0K3CTp8nQg+qZUsx7yPmHrpA9+sLG6dZ8i6u7BWfqN0i8CfwF8H6jcoeDXyY47PAhsBc4Bn46ICylMfp/sYPM0sDcixtK6bkvvBTgUEfct9f1LpVKMjY011fduUb0dZkmTm70tltOvoaHs3IZLbdsGL7zQiV4tT163qTVm9eq5PzyqDQz0/mZY/U7S6YgoLdmu2XDoNYdD85bTryL33fLP/4+9s9xw8BnSBbJ/f/YXl5Q97t/fue9VOdaw3LqZ9RdflbUg9u+HI0fmlmdn55YPH27/96s35F+sbmb9xSOHghgdbaxuZtYKh0NB5PkveV+91fqJf54z3q1kLSmXYe9emJnJlicmsmXwORFWPL4J0RzPVuqgds7I6Pa6lvv9rryy/sX4NmyAV19trF/L4Vku/SGv/495ncLdTp6tZF3hq7dap3Vzll69YFis3s+8W8nMcqvbs/Sk+iOXxUY6/cojB2uJr95qndTtWXoL7dJaibssHQ7Wks98Znl1zwCxZuR5ll6/824la8kjjyxd94wms+LxyMFaspzr8h84MBcMFTMzvh+1WZ45HKzjGpnR5N1PZvngcLCWtPMAXuUEpImJ7P2VE5AcENYt69cvv97vf8g4HCw3fF9h67UfLXCj40vrK+EPGYeD5cZyjl+sWuAndqG6WSPefXd59ZXwh4w/UpYby7mv8HI/vNYZ/b4rZblWwpnUnspqubFuXWN16y5flG7OwMDCtzntFx45WG4899zSdZ+R3TsrYVfKcq2Ek/McDlYo99wDa9bMr61Zk9Wts1bCrhSb43CwQhkehvvuyy6hLGWP99238nZr9IInA6wsPuZghTM87DDoBU8GWFmc+WZmVsPhYH2rmzeJMeu0HTuyn+XK144dnf1+DgfrS5WbxFRmj1RuEuOAsCLasQNOnZpfO3WqswGxosKh28lrvVN997Dl1G3+Z6PyZflwaTAsVW+H3ISDpJ2Snpc0LumOdq+/keRdzoeknR+kvK4rr3qxvdr5M5HHdS22/mZ0++ewndtrJfRrOXIRDpIGgD8AbgGuAT4r6Zp2fo/lJu9yPiTt/CDldV151Yvt1c6fibyuq526/T3bub3aKa/9Wq68TGW9DhiPiB8ASDoO7AIWOGe2eUG9P5uqX1+Elt/G6/K6vC6vq1Pr0uLvaotcjByATcCLVcuTqTaPpBFJY5LGpqamutY5M7O8mKz91dgReRk51BtA1URjRIwCowClUqmp6KyXuNU3pllsKFdpt5w2XpfX5XV5Xd1YV6fkZeQwCWypWt4MvNSjvpiZrXh5CYcngO2Srpa0FtgNnGjnN1ju7SyX087r8rq8Lq8rj+tqp1zsVoqIi5K+CDwKDAD3RsSz7f8+7WvndXldXpfXlcd1tUsuwgEgIh4BHul1P8zMLD+7lczMLEccDmZmVsPhYGZmNRwOZmZWQ9HtQ+BtImkKmLikfCXwag+60y7uf+8Uue9Q7P4Xue9QvP5vi4iNSzUqbDjUI2ksIkq97kez3P/eKXLfodj9L3Lfofj9X4h3K5mZWQ2Hg5mZ1ei3cBjtdQda5P73TpH7DsXuf5H7DsXvf119dczBzMzao99GDmZm1gYOBzMzq9E34SBpp6TnJY1LuqPX/WmUpBckfV/SU5LGet2fxUi6V9J5Sc9U1a6QdFLS2fR4eS/7uJgF+v8bkv4ubf+nJH2yl31ciKQtkh6TdEbSs5IOpHohtv8i/c/99pf0HknflfTXqe//IdWvlvR42vYPpNsOFF5fHHOQNAD8LfBPyW4c9ATw2Yho+z2oO0XSC0ApInJ/Mo2kXwLeAu6PiI+m2n8ELkTEb6dwvjwivtTLfi5kgf7/BvBWRPxOL/u2FElXAVdFxPck/SRwGrgV+JcUYPsv0v/PkPPtL0nAuoh4S9Ia4H8BB4BfA74ZEccl/SHw1xFxpJd9bYd+GTlcB4xHxA8i4h3gOLCrx33qWxHxbeDCJeVdwLH0/BjZBz6XFuh/IUTEyxHxvfT8x8AZsvutF2L7L9L/3IvMW2lxTfoK4BPAQ6me223fqH4Jh03Ai1XLkxTkB65KAH8u6bSkkV53pgkfiIiXIfsFALy/x/1pxhclPZ12O+Vyt0w1SUPAx4DHKeD2v6T/UIDtL2lA0lPAeeAk8L+BNyLiYmpSxN89dfVLONS7/XbR9pf9QkRcC9wC3J52fVj3HAH+IfBzwMvAf+5tdxYn6SeAbwC/GhE/6nV/GlWn/4XY/hExGxE/R3af++uAn6nXrLu96ox+CYdJYEvV8mbgpR71pSkR8VJ6PA/8KdkPXpG8kvYnV/Yrn+9xfxoSEa+kD/67wB+R4+2f9nd/AyhHxDdTuTDbv17/i7T9ASLiDeB/AtcD6yVV7qpZuN89C+mXcHgC2J5mDawFdgMnetynZZO0Lh2cQ9I64CbgmcXflTsngD3p+R7g4R72pWGVX6zJPyen2z8dFD0KnImI3616qRDbf6H+F2H7S9ooaX16/l5gB9kxk8eAT6Vmud32jeqL2UoAaerb7wEDwL0RcajHXVo2ST9NNlqA7L7e/yXP/Zf0deAGsksVvwLcBfxX4EFgK3AO+HRE5PKg7wL9v4Fsl0YALwD/qrIPP08k/SLwF8D3gXdT+dfJ9tvnfvsv0v/PkvPtL+lnyQ44D5D9Yf1gRHw5fX6PA1cATwL/IiLe7l1P26NvwsHMzNqnX3YrmZlZGzkczMyshsPBzMxqOBzMzKyGw8HMzGo4HMzMrIbDwczMavw/I464DC+P+NoAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "plt.scatter(X,y,color='blue')  # 蓝点表示真实值的分布\n",
    "plt.plot(X,model.predict(X),color='red') # 红线表示用模型对X进行预测的结果\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 查看模型对前 5 行数据的预测，并将其与我们的目标数组进行比较。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 0 0 0 0]\n"
     ]
    }
   ],
   "source": [
    "print(model.predict(X[:5])) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 0 0 0 0]\n"
     ]
    }
   ],
   "source": [
    "print(y[:5])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3.给模型打分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6031\n",
      "0.6031\n"
     ]
    }
   ],
   "source": [
    "y_pred = model.predict(X)\n",
    "print((y == y_pred).sum()) #预测值与真实值一样的个数\n",
    "print(model.score(X, y))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 改进模型参数，训练模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\86178\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:818: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
      "\n",
      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
      "Please also refer to the documentation for alternative solver options:\n",
      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
      "  extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6031\n",
      "0.6031\n"
     ]
    }
   ],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "model2 = LogisticRegression() #实例化\n",
    "df.head()\n",
    "X = df[['store_nbr','family']].values# 加多一个特征\n",
    "y = df['sales'].values.astype(int)  # 将 y 定义为目标数组。\n",
    "model2.fit(X, y) # 训练模型\n",
    "y_pred = model2.predict(X)\n",
    "print((y == y_pred).sum()) #预测值与真实值一样的个数\n",
    "print(model2.score(X, y))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
